In [50]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt# Load data
from sklearn import linear_model
import statsmodels.api as sm
%matplotlib inline
city_data = pd.read_csv('USData_ClassProject1.csv')
In [55]:
y = city_data.PercentOver65
x = city_data.AvgHighTemp
x = sm.add_constant(x)
clf = linear_model.LinearRegression()
clf.fit(x,y)
y_hat = clf.predict(x)
In [70]:
plt.scatter(city_data.AvgHighTemp,city_data.PercentOver65,color="Black")
plt.plot(x.AvgHighTemp, y_hat, 'b', alpha=0.9)
plt.xlabel('AvgHighTemp')
plt.ylabel('PercentOver65')
print "R^2: ", clf.score(x,y)